package com.bruce.tool.address.spider;

import com.bruce.tool.address.mysql.domain.Region;
import com.bruce.tool.address.mysql.service.RegionManage;
import com.bruce.tool.address.mysql.service.RegionService;
import com.bruce.tool.address.spider.constant.RegionType;
import com.bruce.tool.address.spider.dto.RegionTable;
import com.bruce.tool.address.spider.handler.RegoinFetcher;
import com.bruce.tool.common.util.LogUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.util.CollectionUtils;

import java.util.List;

/**
 * 功能 :
 *
 * @author : Bruce(刘正航) 2:57 PM 2018/11/19
 */
@Slf4j
@Component
public class Spider implements SpiderRunner {

    @Autowired
    private RegionManage regionManage;

    @Override
    public void start() {
        String rootUrl = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/";

        String entryUrl = RegoinFetcher.fetchYear("2019", rootUrl);

        if (StringUtils.isBlank(entryUrl)) {
            LogUtils.error(log, "{}", "地址错误,请确认有对应年限的数据!");
            return;
        }

        RegionTable regionTable = new RegionTable();
        // 所有省
        List<Region> provinceInfos = RegoinFetcher.fetchRegions(RegionType.PROVINCE, entryUrl, rootUrl, null);
        if (CollectionUtils.isEmpty(provinceInfos)) {
            LogUtils.error(log, "{}", "未获取到省数据,请确认解析逻辑的正确性!");
            return;
        }
        regionTable.setProvinces(provinceInfos);
        LogUtils.info(log, "抓取完省数据");
        regionManage.saveAll(regionTable.getProvinces());


        // 所有城市
        for (Region province : provinceInfos) {
            if (StringUtils.isBlank(province.getHref())) {
                continue;
            }
            List<Region> cityInfos = RegoinFetcher.fetchRegions(RegionType.CITY, province.getHref(), entryUrl, province);
            regionTable.getCities().addAll(cityInfos);
        }
        if (CollectionUtils.isEmpty(regionTable.getCities())) {
            LogUtils.error(log, "{}", "未获取到城市数据,请确认解析逻辑的正确性!");
            return;
        }
        LogUtils.info(log, "抓取完市数据");
        regionManage.saveAll(regionTable.getCities());


        // 所有区/县
        for (Region city : regionTable.getCities()) {
            if (StringUtils.isBlank(city.getHref())) {
                continue;
            }
            List<Region> countyInfos = RegoinFetcher.fetchRegions(RegionType.COUNTY, city.getHref(), city.getReferer(), city);
            regionTable.getCounties().addAll(countyInfos);
        }
        if (CollectionUtils.isEmpty(regionTable.getCounties())) {
            LogUtils.error(log, "{}", "未获取到区/县数据,请确认解析逻辑的正确性!");
            return;
        }
        regionManage.saveAll(regionTable.getCounties());


        // 特殊处理
        // 执行重庆市的特殊处理: 重庆市下边有区,也有县,把区县都放到一个父节点中, 参考淘宝地址结构
        // update global_region set pcode = '500100000000' where pcode = '500200000000'
//        regionService.updateDirectInfo();
        // delete from global_region where code = '500200000000'
//        regionService.deleteDirectInfo();
        LogUtils.info(log, "抓取完区/县数据");

        // 以下数据,暂时不抓取
//        fetchRestRegionInfo(regionTable);

        LogUtils.info(log, "所有地址数据保存完毕.");

        /**
         * 剩下的工作:
         * 补充香港,澳门,台湾的详细地址
         */
    }

    /**
     * 乡镇,街道数据,三级地址不需要这部分数据
     * 这部分数据,抓取时间比较长,没有必要的情况下,不建议抓取;
     */
    private void fetchRestRegionInfo(RegionTable regionTable) {
        // 所有乡镇
        for (Region county : regionTable.getCounties()) {
            if (StringUtils.isBlank(county.getHref())) {
                continue;
            }
            List<Region> townInfos = RegoinFetcher.fetchRegions(RegionType.TOWN, county.getHref(), county.getReferer(), county);
            regionTable.getTowns().addAll(townInfos);
        }
        if (CollectionUtils.isEmpty(regionTable.getTowns())) {
            LogUtils.error(log, "{}", "未获取到城镇数据,请确认解析逻辑的正确性!");
            return;
        }
        LogUtils.info(log, "抓取完乡镇数据");
        regionManage.saveAll(regionTable.getTowns());


        // 所有街道数据
        for (Region town : regionTable.getTowns()) {
            if (StringUtils.isBlank(town.getHref())) {
                continue;
            }
            List<Region> streetInfos = RegoinFetcher.fetchRegions(RegionType.STREET, town.getHref(), town.getReferer(), town);
            regionTable.getStreets().addAll(streetInfos);
        }
        if (CollectionUtils.isEmpty(regionTable.getStreets())) {
            LogUtils.error(log, "{}", "未获取到街道数据,请确认解析逻辑的正确性!");
            return;
        }
        regionManage.saveAll(regionTable.getStreets());
        LogUtils.info(log, "抓取完街道数据");
    }
}
